library(tidyverse) ; library(reshape2) ; library(glue) ; library(plotly) ; library(dendextend)
library(RColorBrewer) ; library(viridis) ; require(gridExtra) ; library(GGally) ; library(ggpubr)
library(expss)
library(polycor)
library(foreach) ; library(doParallel)
library(knitr) ; library(kableExtra)
library(biomaRt)
library(clusterProfiler) ; library(ReactomePA) ; library(DOSE) ; library(org.Hs.eg.db)
library(WGCNA)
SFARI_colour_hue = function(r) {
pal = c('#FF7631','#FFB100','#E8E328','#8CC83F','#62CCA6','#59B9C9','#b3b3b3','#808080','gray','#d9d9d9')[r]
}
Load preprocessed dataset (preprocessing code in 01_data_preprocessing.Rmd) and clustering (pipeline in 05_WGCNA.Rmd)
# Gandal dataset
load('./../Data/preprocessed_data.RData')
datExpr = datExpr %>% data.frame
DE_info = DE_info %>% data.frame
# GO Neuronal annotations: regex 'neuron' in GO functional annotations and label the genes that make a match as neuronal
GO_annotations = read.csv('./../Data/genes_GO_annotations.csv')
GO_neuronal = GO_annotations %>% filter(grepl('neuron', go_term)) %>%
mutate('ID'=as.character(ensembl_gene_id)) %>%
dplyr::select(-ensembl_gene_id) %>% distinct(ID) %>%
mutate('Neuronal'=1)
# SFARI Genes
SFARI_genes = read_csv('./../../../SFARI/Data/SFARI_genes_01-03-2020_w_ensembl_IDs.csv')
SFARI_genes = SFARI_genes[!duplicated(SFARI_genes$ID) & !is.na(SFARI_genes$ID),]
# Old SFARI Genes
old_SFARI_genes = read_csv('./../../../SFARI/Data/SFARI_genes_08-29-2019_w_ensembl_IDs.csv')
old_SFARI_genes = old_SFARI_genes[!duplicated(old_SFARI_genes$ID) & !is.na(old_SFARI_genes$ID),]
# Clusterings
clusterings = read_csv('./../Data/clusters.csv')
# Update DE_info with SFARI and Neuronal information
genes_info = DE_info %>% mutate('ID'=rownames(.)) %>% left_join(SFARI_genes, by='ID') %>%
mutate(`gene-score`=ifelse(is.na(`gene-score`), 'Others', `gene-score`)) %>%
left_join(old_SFARI_genes %>% rename(`gene-score` = 'old-gene-score') %>%
dplyr::select(ID, `old-gene-score`), by = 'ID') %>%
mutate(`old-gene-score`=ifelse(is.na(`old-gene-score`), 'Others', `old-gene-score`)) %>%
left_join(GO_neuronal, by='ID') %>% left_join(clusterings, by='ID') %>%
mutate(Neuronal=ifelse(is.na(Neuronal), 0, Neuronal)) %>%
mutate(gene.score=ifelse(`gene-score`=='Others' & Neuronal==1, 'Neuronal', `gene-score`),
old.gene.score=ifelse(`old-gene-score`=='Others' & Neuronal==1,'Neuronal',`old-gene-score`),
significant=padj<0.05 & !is.na(padj))
# Add gene symbol
getinfo = c('ensembl_gene_id','external_gene_id')
mart = useMart(biomart='ENSEMBL_MART_ENSEMBL', dataset='hsapiens_gene_ensembl',
host='feb2014.archive.ensembl.org') ## Gencode v19
gene_names = getBM(attributes=getinfo, filters=c('ensembl_gene_id'), values=genes_info$ID, mart=mart)
genes_info = genes_info %>% left_join(gene_names, by=c('ID'='ensembl_gene_id'))
clustering_selected = 'DynamicHybrid'
genes_info$Module = genes_info[,clustering_selected]
dataset = read.csv(paste0('./../Data/dataset_', clustering_selected, '.csv'))
dataset$Module = dataset[,clustering_selected]
dataset$gene.score = as.character(dataset$gene.score)
dataset$gene.score[dataset$gene.score=='None'] = 'Others'
rm(DE_info, GO_annotations, clusterings, getinfo, mart, dds)
plot_data = dataset %>% dplyr::select(Module, MTcor) %>% distinct %>%
mutate(alpha = ifelse(abs(MTcor)>0.7, 1, 0.3))
top_modules = plot_data %>% arrange(desc(MTcor)) %>% filter(abs(MTcor)>0.7) %>% pull(Module) %>% as.character
Selecting Modules with a Module-Diagnosis correlation magnitude larger than 0.7 (instead of 0.9 as we did with Gandal’s dataset because the relation is not as strong in this dataset)
The 4 modules that fulfill this condition are #F97474, #00BC53, #FF699E, #F8766D
ggplotly(plot_data %>% ggplot(aes(reorder(Module, -MTcor), MTcor)) +
geom_bar(stat='identity', fill = plot_data$Module, alpha = plot_data$alpha) +
geom_hline(yintercept =c(0.7, -0.7), color = 'gray', linetype = 'dotted') +
xlab('Modules')+ ylab('Module-Diagnosis Correlation') + theme_minimal() +
theme(axis.text.x = element_text(angle = 90, hjust = 1)))
The modules consist mainly of points with high (absolute) values in PC2 (which we know is related to LFC), so this result is consistent with the high correlation between Module and Diagnosis, although some of the points with the highest PC2 values do not belong to these top modules
The genes belonging to the modules with the positive Module-Diagnosis correlation have positive LFC values and the genes belonging to the modules with the negative Module-Diagnosis correlation have negative values
pca = datExpr %>% prcomp
plot_data = data.frame('ID'=rownames(datExpr), 'PC1' = pca$x[,1], 'PC2' = pca$x[,2]) %>%
left_join(dataset, by='ID') %>%
left_join(genes_info %>% dplyr::select(ID, external_gene_id), by='ID') %>%
dplyr::select(ID, external_gene_id, PC1, PC2, Module, gene.score) %>%
mutate(ImportantModules = ifelse(Module %in% top_modules, as.character(Module), 'Others')) %>%
mutate(color = ifelse(ImportantModules=='Others','gray',ImportantModules),
alpha = ifelse(ImportantModules=='Others', 0.1, 0.6),
gene_id = paste0(ID, ' (', external_gene_id, ')')) %>%
apply_labels(ImportantModules = 'Top Modules')
cro(plot_data$ImportantModules)
| #Total | |
|---|---|
| Top Modules | |
| #00BC53 | 489 |
| #F8766D | 20 |
| #F97474 | 354 |
| #FF699E | 224 |
| Others | 12075 |
| #Total cases | 13162 |
ggplotly(plot_data %>% ggplot(aes(PC1, PC2, color=ImportantModules)) +
geom_point(alpha=plot_data$alpha, color=plot_data$color, aes(ID=gene_id)) + theme_minimal() +
xlab(paste0('PC1 (',round(100*summary(pca)$importance[2,1],2),'%)')) +
ylab(paste0('PC2 (',round(100*summary(pca)$importance[2,2],2),'%)')) +
ggtitle('Genes belonging to the Modules with the strongest relation to ASD'))
rm(pca)
List of top 20 SFARI Genes in top modules ordered by SFARI score and Gene Significance
list_top_SFARI_genes = function(table_data, module) {
t = table_data %>% filter(Module == module & `SFARI score` %in% c(1,2,3)) %>%
slice_head(n=20) %>% dplyr::select(-Module, -`Ensembl ID`)
return(t)
}
table_data = dataset %>% left_join(genes_info %>% dplyr::select(ID, external_gene_id), by='ID') %>%
dplyr::select(ID, external_gene_id, GS, gene.score, Module) %>%
arrange(gene.score, desc(abs(GS))) %>%
dplyr::rename('Ensembl ID' = ID, 'Gene Symbol' = external_gene_id,
'SFARI score' = gene.score, 'Gene Significance' = GS)
kable(list_top_SFARI_genes(table_data, top_modules[1]),
caption=paste0('Top SFARI Genes for Module ', top_modules[1])) %>%
kable_styling(full_width = F)
| Gene Symbol | Gene Significance | SFARI score |
|---|---|---|
| PTEN | 0.3138876 | 1 |
| KATNAL2 | 0.2364679 | 1 |
| SUV420H1 | 0.1769425 | 1 |
| SMAD4 | 0.3850305 | 2 |
| JARID2 | 0.3843220 | 2 |
| AGO4 | 0.3745789 | 2 |
| HLA-DRB1 | 0.5405759 | 3 |
| HLA-DPB1 | 0.4036844 | 3 |
| CECR2 | 0.3503966 | 3 |
| C16orf13 | 0.3360302 | 3 |
| ZNF548 | 0.2834165 | 3 |
| PPP2R1B | 0.2589386 | 3 |
| GRIK4 | 0.0706853 | 3 |
kable(list_top_SFARI_genes(table_data, top_modules[2]),
caption=paste0('Top SFARI Genes for Module ', top_modules[2])) %>%
kable_styling(full_width = F)
| Gene Symbol | Gene Significance | SFARI score |
|---|---|---|
| NIPBL | 0.4365605 | 1 |
| ADNP | 0.3880257 | 1 |
| DMPK | 0.3116284 | 1 |
| WDFY3 | 0.2587197 | 1 |
| CDKL5 | 0.2568833 | 1 |
| NF1 | 0.1317979 | 1 |
| ACTB | 0.1162313 | 1 |
| PAK2 | 0.5506436 | 2 |
| ANXA1 | 0.3747179 | 2 |
| OXTR | 0.2921324 | 2 |
| RALGAPB | 0.1421652 | 2 |
| SMG6 | 0.4836250 | 3 |
| LRRC1 | 0.4508373 | 3 |
| MAOB | 0.4289618 | 3 |
| CHD1 | 0.4122330 | 3 |
| AZGP1 | 0.4093596 | 3 |
| GLO1 | 0.3904694 | 3 |
| SCFD2 | 0.3704353 | 3 |
| NRP2 | 0.3160518 | 3 |
| RNF25 | 0.3130442 | 3 |
kable(list_top_SFARI_genes(table_data, top_modules[3]),
caption=paste0('Top SFARI Genes for Module ', top_modules[3])) %>%
kable_styling(full_width = F)
| Gene Symbol | Gene Significance | SFARI score |
|---|---|---|
| SLC6A1 | -0.5020894 | 1 |
| RERE | -0.4189176 | 1 |
| SMARCC2 | -0.1717047 | 1 |
| NR4A2 | -0.1167603 | 1 |
| DPP10 | -0.5611000 | 2 |
| CLASP1 | -0.4086528 | 2 |
| LEO1 | -0.4057350 | 2 |
| SNX5 | -0.1114436 | 2 |
| DAGLA | -0.7411211 | 3 |
| CDH22 | -0.6501558 | 3 |
| CASC4 | -0.5128066 | 3 |
| ABAT | -0.4491727 | 3 |
| YEATS2 | -0.2742193 | 3 |
| RAB11FIP5 | -0.2445716 | 3 |
| MEMO1 | -0.2183056 | 3 |
| DLX2 | -0.1694310 | 3 |
kable(list_top_SFARI_genes(table_data, top_modules[4]),
caption=paste0('Top SFARI Genes for Module ', top_modules[4])) %>%
kable_styling(full_width = F)
| Gene Symbol | Gene Significance | SFARI score |
|---|---|---|
| ZC3H4 | -0.5057941 | 2 |
| ICA1 | -0.2742045 | 2 |
rm(table_data, list_top_SFARI_genes)
Since these modules have the strongest relation to autism, this pattern should be reflected in their model eigengenes, having two different behaviours for the samples corresponding to autism and the ones corresponding to control
In all cases, the Eigengenes separate the behaviour between autism and control samples very clearly (p-value < \(10^{-4}\)). Modules with positive Module-Diagnosis correlation have higher eigengenes in the ASD samples and Modules with a negative correlation, in the Control samples
plot_EGs = function(module){
plot_data = data.frame('ID' = rownames(MEs), 'MEs' = MEs[,paste0('ME', module)],
'Diagnosis' = datMeta$Diagnosis)
p = plot_data %>% ggplot(aes(Diagnosis, MEs, fill=Diagnosis)) +
geom_boxplot(outlier.colour='#cccccc', outlier.shape='o', outlier.size=3) +
ggtitle(paste0('Module ', module, ' (MTcor=',round(dataset$MTcor[dataset$Module == module][1],2),')')) +
stat_compare_means(method = 't.test', method.args = list(var.equal = FALSE), label = 'p.signif',
ref.group = 'ASD') +
ylab('Module Eigengenes') + theme_minimal() + theme(legend.position='none')
return(p)
}
# Calculate Module Eigengenes
ME_object = datExpr %>% t %>% moduleEigengenes(colors = genes_info$Module)
MEs = orderMEs(ME_object$eigengenes)
p1 = plot_EGs(top_modules[1])
p2 = plot_EGs(top_modules[2])
p3 = plot_EGs(top_modules[3])
p4 = plot_EGs(top_modules[4])
grid.arrange(p1, p2, p3, p4, nrow=2)
rm(plot_EGs, ME_object, MEs, p1, p2, p3, p4)
In the WGCNA pipeline, the most representative genes of each module are selected based on having a high module membership as well as a high (absolute) gene significance, so I’m going to do the same
SFARI Genes don’t seem to be more representative than the rest of the genes
create_plot = function(module){
plot_data = dataset %>% dplyr::select(ID, paste0('MM.',gsub('#','',module)), GS, gene.score) %>%
filter(dataset$Module==module)
colnames(plot_data)[2] = 'Module'
SFARI_colors = as.numeric(names(table(as.character(plot_data$gene.score)[plot_data$gene.score!='Others'])))
p = ggplotly(plot_data %>% mutate(gene.score = ifelse(gene.score =='Others', 'Not in SFARI', gene.score)) %>%
ggplot(aes(Module, GS, color=gene.score)) +
geom_point(alpha=0.5, aes(ID=ID)) + xlab('Module Membership') + ylab('Gene Significance') +
ggtitle(paste0('Module ', module, ' (MTcor = ',
round(dataset$MTcor[dataset$Module == module][1],2),')')) +
scale_color_manual(values=SFARI_colour_hue(r=c(SFARI_colors,7))) + theme_minimal())
return(p)
}
create_plot(top_modules[1])
create_plot(top_modules[2])
create_plot(top_modules[3])
create_plot(top_modules[4])
rm(create_plot)
Ordered by \(\frac{MM+|GS|}{2}\)
There aren’t that many SFARI genes in the top genes of the modules
create_table = function(module){
top_genes = dataset %>% left_join(genes_info %>% dplyr::select(ID, external_gene_id), by='ID') %>%
dplyr::select(ID, external_gene_id, paste0('MM.',gsub('#','',module)), GS, gene.score) %>%
filter(dataset$Module==module) %>% dplyr::rename('MM' = paste0('MM.',gsub('#','',module))) %>%
mutate(Relevance = (MM+abs(GS))/2,
gene.score = ifelse(gene.score =='Others', 'Not in SFARI', gene.score)) %>%
arrange(by=-Relevance) %>% top_n(20) %>%
dplyr::rename('Gene Symbol' = external_gene_id, 'SFARI Score' = gene.score)
return(top_genes)
}
top_genes = list()
for(i in 1:length(top_modules)) top_genes[[i]] = create_table(top_modules[i])
kable(top_genes[[1]] %>% dplyr::select(-ID), caption=paste0('Top 20 genes for Module ', top_modules[1],
' (MTcor = ', round(dataset$MTcor[dataset$Module == top_modules[1]][1],2),')')) %>%
kable_styling(full_width = F)
| Gene Symbol | MM | GS | SFARI Score | Relevance |
|---|---|---|---|---|
| TCF7L1 | 0.7771321 | 0.6835251 | Not in SFARI | 0.7303286 |
| RHOC | 0.7754553 | 0.6721907 | Not in SFARI | 0.7238230 |
| BCL7C | 0.7413363 | 0.6452049 | Not in SFARI | 0.6932706 |
| CLN5 | 0.7697223 | 0.6153303 | Not in SFARI | 0.6925263 |
| KLHL36 | 0.7305701 | 0.5941385 | Not in SFARI | 0.6623543 |
| IQCK | 0.7174191 | 0.5997755 | Not in SFARI | 0.6585973 |
| PPCDC | 0.7433214 | 0.5665903 | Not in SFARI | 0.6549559 |
| FERMT2 | 0.6618375 | 0.6283392 | Not in SFARI | 0.6450883 |
| METTL9 | 0.6835214 | 0.6013762 | Not in SFARI | 0.6424488 |
| SNTB2 | 0.6142474 | 0.6655963 | Not in SFARI | 0.6399218 |
| ASCL1 | 0.6150432 | 0.6644773 | Not in SFARI | 0.6397602 |
| APLNR | 0.6377913 | 0.6248271 | Not in SFARI | 0.6313092 |
| TAP1 | 0.6302934 | 0.6247122 | Not in SFARI | 0.6275028 |
| ITGB4 | 0.6209799 | 0.6290636 | Not in SFARI | 0.6250218 |
| ENTPD2 | 0.6704765 | 0.5744062 | Not in SFARI | 0.6224413 |
| DENND2C | 0.5496190 | 0.6933139 | Not in SFARI | 0.6214665 |
| LYRM1 | 0.7408410 | 0.4953295 | Not in SFARI | 0.6180853 |
| DTX3L | 0.5745838 | 0.6590922 | Not in SFARI | 0.6168380 |
| TRPS1 | 0.5455052 | 0.6822391 | Not in SFARI | 0.6138722 |
| MYO1C | 0.6942435 | 0.5260777 | Not in SFARI | 0.6101606 |
kable(top_genes[[2]] %>% dplyr::select(-ID), caption=paste0('Top 20 genes for Module ', top_modules[2],
' (MTcor = ', round(dataset$MTcor[dataset$Module == top_modules[2]][1],2),')')) %>%
kable_styling(full_width = F)
| Gene Symbol | MM | GS | SFARI Score | Relevance |
|---|---|---|---|---|
| S100A10 | 0.7600843 | 0.8259635 | Not in SFARI | 0.7930239 |
| TIMP1 | 0.8528491 | 0.6658584 | Not in SFARI | 0.7593538 |
| SERPINA3 | 0.7349599 | 0.6723833 | Not in SFARI | 0.7036716 |
| POLR2H | 0.7130493 | 0.6814970 | Not in SFARI | 0.6972732 |
| PICALM | 0.7934017 | 0.5890945 | Not in SFARI | 0.6912481 |
| EIF4EBP1 | 0.6782985 | 0.6997893 | Not in SFARI | 0.6890439 |
| TNFRSF1A | 0.7980125 | 0.5799478 | Not in SFARI | 0.6889801 |
| RARRES3 | 0.5956617 | 0.7705862 | Not in SFARI | 0.6831239 |
| VAMP5 | 0.7129454 | 0.6335556 | Not in SFARI | 0.6732505 |
| SSR3 | 0.6394790 | 0.6877353 | Not in SFARI | 0.6636071 |
| RNF103 | 0.7323857 | 0.5865942 | Not in SFARI | 0.6594900 |
| CHI3L1 | 0.6887100 | 0.6194934 | Not in SFARI | 0.6541017 |
| LRAT | 0.7085019 | 0.5864767 | Not in SFARI | 0.6474893 |
| COL27A1 | 0.6438934 | 0.6433710 | Not in SFARI | 0.6436322 |
| GFPT2 | 0.7674076 | 0.5150615 | Not in SFARI | 0.6412346 |
| C1R | 0.6110063 | 0.6622772 | Not in SFARI | 0.6366418 |
| TGFB2 | 0.6822857 | 0.5901051 | Not in SFARI | 0.6361954 |
| PLEKHA4 | 0.5243532 | 0.7405088 | Not in SFARI | 0.6324310 |
| ATP6V0E1 | 0.7262139 | 0.5297029 | Not in SFARI | 0.6279584 |
| GDI2 | 0.6452767 | 0.6044045 | Not in SFARI | 0.6248406 |
kable(top_genes[[3]] %>% dplyr::select(-ID), caption=paste0('Top 20 genes for Module ', top_modules[3],
' (MTcor = ', round(dataset$MTcor[dataset$Module == top_modules[3]][1],2),')')) %>%
kable_styling(full_width = F)
| Gene Symbol | MM | GS | SFARI Score | Relevance |
|---|---|---|---|---|
| PTPRN2 | 0.9066040 | -0.7775306 | Not in SFARI | 0.8420673 |
| DAGLA | 0.8842467 | -0.7411211 | 3 | 0.8126839 |
| CALM3 | 0.7801869 | -0.6645951 | Not in SFARI | 0.7223910 |
| CAMTA2 | 0.7670562 | -0.6765830 | Not in SFARI | 0.7218196 |
| LZTS3 | 0.7631089 | -0.6391308 | Not in SFARI | 0.7011198 |
| SEPT5 | 0.6861954 | -0.6618160 | Not in SFARI | 0.6740057 |
| CDH22 | 0.6886071 | -0.6501558 | 3 | 0.6693814 |
| LYNX1 | 0.6139214 | -0.7142691 | Not in SFARI | 0.6640952 |
| MPPED1 | 0.6849524 | -0.6399206 | Not in SFARI | 0.6624365 |
| REXO1 | 0.6839090 | -0.6285212 | Not in SFARI | 0.6562151 |
| PLEKHA6 | 0.6969928 | -0.5628770 | Not in SFARI | 0.6299349 |
| PIAS2 | 0.6612106 | -0.5897259 | Not in SFARI | 0.6254682 |
| BAG6 | 0.6170320 | -0.6193808 | Not in SFARI | 0.6182064 |
| SDR16C5 | 0.5390447 | -0.6739748 | Not in SFARI | 0.6065097 |
| DPP10 | 0.6484100 | -0.5611000 | 2 | 0.6047550 |
| ATXN7L3 | 0.6846258 | -0.5243468 | Not in SFARI | 0.6044863 |
| AMPD2 | 0.6583284 | -0.5172108 | Not in SFARI | 0.5877696 |
| GLT1D1 | 0.6539256 | -0.5211396 | Not in SFARI | 0.5875326 |
| ADRBK1 | 0.6320813 | -0.5403402 | Not in SFARI | 0.5862108 |
| SLC6A1 | 0.6564682 | -0.5020894 | 1 | 0.5792788 |
kable(top_genes[[4]] %>% dplyr::select(-ID), caption=paste0('Top 20 genes for Module ', top_modules[4],
' (MTcor = ', round(dataset$MTcor[dataset$Module == top_modules[4]][1],2),')')) %>%
kable_styling(full_width = F)
| Gene Symbol | MM | GS | SFARI Score | Relevance |
|---|---|---|---|---|
| GPRASP1 | 0.7717372 | -0.8019529 | Not in SFARI | 0.7868450 |
| CHGB | 0.8556614 | -0.6575324 | Not in SFARI | 0.7565969 |
| MYBBP1A | 0.7757858 | -0.6746362 | Not in SFARI | 0.7252110 |
| SPTBN4 | 0.7464208 | -0.6773998 | Not in SFARI | 0.7119103 |
| ARMCX4 | 0.6279354 | -0.7234746 | Not in SFARI | 0.6757050 |
| LSM14B | 0.6904034 | -0.6334935 | Not in SFARI | 0.6619485 |
| ARMCX1 | 0.6318743 | -0.6030133 | Not in SFARI | 0.6174438 |
| PEX16 | 0.5831425 | -0.5849526 | Not in SFARI | 0.5840476 |
| ZC3H4 | 0.6617162 | -0.5057941 | 2 | 0.5837551 |
| IKBKG | 0.5821465 | -0.5696131 | Not in SFARI | 0.5758798 |
| NAP1L2 | 0.6519877 | -0.4792486 | Not in SFARI | 0.5656182 |
| SNX1 | 0.6346156 | -0.4962871 | Not in SFARI | 0.5654513 |
| TSPYL1 | 0.7482471 | -0.3806421 | Not in SFARI | 0.5644446 |
| CTTN | 0.6050875 | -0.4794393 | Not in SFARI | 0.5422634 |
| ZBTB8OS | 0.6117415 | -0.4081671 | Not in SFARI | 0.5099543 |
| BCL7A | 0.5459645 | -0.2186945 | Not in SFARI | 0.3823295 |
| ICA1 | 0.4894869 | -0.2742045 | 2 | 0.3818457 |
| SCN5A | 0.3588764 | -0.2723701 | Not in SFARI | 0.3156232 |
| LCMT2 | 0.2818653 | -0.2128282 | Not in SFARI | 0.2473468 |
| PRICKLE3 | 0.1882228 | -0.1050373 | Not in SFARI | 0.1466301 |
rm(create_table, i)
pca = datExpr %>% prcomp
ids = c()
for(tg in top_genes) ids = c(ids, tg$ID)
plot_data = data.frame('ID'=rownames(datExpr), 'PC1' = pca$x[,1], 'PC2' = pca$x[,2]) %>%
left_join(dataset, by='ID') %>% dplyr::select(ID, PC1, PC2, Module, gene.score) %>%
mutate(color = ifelse(Module %in% top_modules, as.character(Module), 'gray')) %>%
mutate(alpha = ifelse(color %in% top_modules & ID %in% ids, 1, 0.1))
plot_data %>% ggplot(aes(PC1, PC2)) + geom_point(alpha=plot_data$alpha, color=plot_data$color) +
xlab(paste0('PC1 (',round(100*summary(pca)$importance[2,1],2),'%)')) +
ylab(paste0('PC2 (',round(100*summary(pca)$importance[2,2],2),'%)')) +
theme_minimal() + ggtitle('Most relevant genes for top Modules')
rm(ids, pca, tg, plot_data)
Level of expression by Diagnosis for top genes, ordered by relevance (defined above): There is a visible difference in level of expression between diagnosis groups in all of these genes
create_plot = function(i){
plot_data = datExpr[rownames(datExpr) %in% top_genes[[i]]$ID,] %>% mutate('ID' = rownames(.)) %>%
melt(id.vars='ID') %>% mutate(variable = gsub('X','',variable)) %>%
left_join(top_genes[[i]], by='ID') %>%
left_join(datMeta %>% dplyr::select(ID, Diagnosis),
by = c('variable'='ID')) %>% arrange(desc(Relevance))
p = ggplotly(plot_data %>% mutate(external_gene_id=factor(`Gene Symbol`,
levels=unique(plot_data$`Gene Symbol`), ordered=T)) %>%
ggplot(aes(`Gene Symbol`, value, fill=Diagnosis)) + geom_boxplot() + theme_minimal() +
ggtitle(paste0('Top Genes for Module ', top_modules[i], ' (MTcor = ',
round(dataset$MTcor[dataset$Module == top_modules[i]][1],2), ')')) +
xlab('') + ylab('Level of Expression') +
theme(axis.text.x = element_text(angle = 90, hjust = 1)))
return(p)
}
create_plot(1)
create_plot(2)
create_plot(3)
create_plot(4)
rm(create_plot)
Using the package clusterProfiler. Performing Gene Set Enrichment Analysis (GSEA) and Over Representation Analysis (ORA) using the following datasets:
Gene Ontology
Disease Ontology
Disease Gene Network
KEGG
REACTOME
file_name = './../Data/GSEA.RData'
if(file.exists(file_name)){
load(file_name)
} else {
##############################################################################
# PREPARE DATASET
# Create dataset with top modules membership and removing the genes without an assigned module
EA_dataset = data.frame('ensembl_gene_id' = genes_info$ID, module = genes_info$Module) %>%
filter(genes_info$Module!='gray')
# Assign Entrez Gene Id to each gene
getinfo = c('ensembl_gene_id','entrezgene')
mart = useMart(biomart='ENSEMBL_MART_ENSEMBL', dataset='hsapiens_gene_ensembl',
host='feb2014.archive.ensembl.org')
biomart_output = getBM(attributes=getinfo, filters=c('ensembl_gene_id'),
values=genes_info$ID[genes_info$Module!='gray'], mart=mart)
EA_dataset = biomart_output %>% dplyr::rename('ID' = ensembl_gene_id) %>%
left_join(dataset %>% dplyr::select(ID, contains('MM.')), by='ID')
##############################################################################
# PERFORM ENRICHMENT
# Following https://yulab-smu.github.io/clusterProfiler-book/chapter8.html
modules = dataset$Module[dataset$Module!='gray'] %>% as.character %>% table %>% names
nPerm = 1e5 # 100 times more than the default
enrichment_GO = list() # Gene Ontology
enrichment_DO = list() # Disease Ontology
enrichment_DGN = list() # Disease Gene Networks
enrichment_KEGG = list() # Kyoto Encyclopedia of Genes and Genomes
enrichment_Reactome = list() # Reactome: Pathway db
for(module in modules){
cat('\n')
cat(paste0('Module: ', which(modules == module), '/', length(modules)))
geneList = EA_dataset[,paste0('MM.',substring(module,2))]
names(geneList) = EA_dataset[,'entrezgene'] %>% as.character
geneList = sort(geneList, decreasing = TRUE)
enrichment_GO[[module]] = gseGO(geneList, OrgDb = org.Hs.eg.db, pAdjustMethod = 'bonferroni',
pvalueCutoff = 0.1, nPerm = nPerm, verbose = FALSE, seed = TRUE) %>%
data.frame
enrichment_DO[[module]] = gseDO(geneList, pAdjustMethod = 'bonferroni', pvalueCutoff = 0.1,
nPerm = nPerm, verbose = FALSE, seed = TRUE) %>% data.frame
enrichment_DGN[[module]] = gseDGN(geneList, pAdjustMethod = 'bonferroni', pvalueCutoff = 0.1,
nPerm = nPerm, verbose = FALSE, seed = TRUE) %>% data.frame
enrichment_KEGG[[module]] = gseKEGG(geneList, organism = 'human', pAdjustMethod = 'bonferroni',
pvalueCutoff = 0.1, nPerm = nPerm, verbose = FALSE, seed = TRUE) %>%
data.frame
enrichment_Reactome[[module]] = gsePathway(geneList, organism = 'human', pAdjustMethod = 'bonferroni',
pvalueCutoff = 0.1, nPerm = nPerm, verbose = F, seed = T) %>%
data.frame
# Temporal save, just in case SFARI Genes enrichment fails
save(enrichment_GO, enrichment_DO, enrichment_DGN, enrichment_KEGG, enrichment_Reactome, file=file_name)
}
##############################################################################
# PERFROM ENRICHMENT FOR SFARI GENES
# BUILD MAPPING BETWEEN GENES AND SFARI
# Build TERM2GENE: dataframe of 2 columns with term and gene
term2gene = biomart_output %>%
left_join(genes_info %>% dplyr::select(ID, `gene-score`),
by = c('ensembl_gene_id'='ID')) %>% dplyr::select(-ensembl_gene_id) %>%
mutate('SFARI' = ifelse(`gene-score` != 'Others','SFARI','Others'),
`gene-score` = ifelse(`gene-score` != 'Others',
paste0('SFARI Score ',`gene-score`), 'Others')) %>%
melt(id.vars = 'entrezgene') %>% dplyr::select(value, entrezgene) %>%
dplyr::rename('term' = value, 'gene' = entrezgene) %>% distinct
# PERFORM GSEA
enrichment_SFARI = list()
cat('\n\nGSEA OF SFARI GENES\n')
for(module in modules){
cat('\n')
cat(paste0('Module: ', which(modules == module), '/', length(modules)))
geneList = EA_dataset[,paste0('MM.',substring(module,2))]
names(geneList) = EA_dataset[,'entrezgene'] %>% as.character
geneList = sort(geneList, decreasing = TRUE)
enrichment_SFARI[[module]] = clusterProfiler::GSEA(geneList, pAdjustMethod = 'bonferroni', nPerm = nPerm,
TERM2GENE = term2gene, pvalueCutoff=1, maxGSSize=2e3,
verbose = FALSE, seed = TRUE) %>% data.frame
# Temporal save
save(enrichment_GO, enrichment_DO, enrichment_DGN, enrichment_KEGG, enrichment_Reactome,
enrichment_SFARI, file=file_name)
}
##############################################################################
# PERFROM ENRICHMENT FOR OLD SFARI GENES
# BUILD MAPPING BETWEEN GENES AND SFARI
# Build TERM2GENE: dataframe of 2 columns with term and gene
term2gene = biomart_output %>%
left_join(genes_info %>% dplyr::select(ID, `old-gene-score`),
by = c('ensembl_gene_id'='ID')) %>% dplyr::select(-ensembl_gene_id) %>%
mutate('SFARI' = ifelse(`old-gene-score` != 'Others','SFARI','Others'),
`old-gene-score` = ifelse(`old-gene-score` != 'Others',
paste0('SFARI Score ',`old-gene-score`), 'Others')) %>%
melt(id.vars = 'entrezgene') %>% dplyr::select(value, entrezgene) %>%
dplyr::rename('term' = value, 'gene' = entrezgene) %>% distinct
# PERFORM GSEA
enrichment_old_SFARI = list()
cat('\n\nGSEA OF OLD SFARI GENES\n')
for(module in modules){
cat('\n')
cat(paste0('Module: ', which(modules == module), '/', length(modules)))
geneList = EA_dataset[,paste0('MM.',substring(module,2))]
names(geneList) = EA_dataset[,'entrezgene'] %>% as.character
geneList = sort(geneList, decreasing = TRUE)
enrichment_old_SFARI[[module]] = clusterProfiler::GSEA(geneList, pAdjustMethod = 'bonferroni',
nPerm = nPerm, TERM2GENE = term2gene, pvalueCutoff=1,
maxGSSize=2e3, verbose = FALSE, seed = TRUE) %>%
data.frame
# Temporal save
save(enrichment_GO, enrichment_DO, enrichment_DGN, enrichment_KEGG, enrichment_Reactome,
enrichment_SFARI, enrichment_old_SFARI, file=file_name)
}
##############################################################################
# Save enrichment results
save(enrichment_GO, enrichment_DO, enrichment_DGN, enrichment_KEGG, enrichment_Reactome,
enrichment_SFARI, enrichment_old_SFARI, file=file_name)
rm(getinfo, mart, biomart_output, module, term2gene, geneList, EA_dataset, nPerm)
}
# Rename lists
GSEA_GO = enrichment_GO
GSEA_DGN = enrichment_DGN
GSEA_DO = enrichment_DO
GSEA_KEGG = enrichment_KEGG
GSEA_Reactome = enrichment_Reactome
GSEA_SFARI = enrichment_SFARI
GSEA_old_SFARI = enrichment_old_SFARI
file_name = './../Data/ORA.RData'
if(file.exists(file_name)){
load(file_name)
} else {
##############################################################################
# PREPARE DATASET
# Create dataset with top modules membership and removing the genes without an assigned module
EA_dataset = data.frame('ensembl_gene_id' = genes_info$ID, module = genes_info$Module) %>%
filter(genes_info$Module!='gray')
# Assign Entrez Gene Id to each gene
getinfo = c('ensembl_gene_id','entrezgene')
mart = useMart(biomart='ENSEMBL_MART_ENSEMBL', dataset='hsapiens_gene_ensembl',
host='feb2014.archive.ensembl.org')
biomart_output = getBM(attributes=getinfo, filters=c('ensembl_gene_id'),
values=genes_info$ID[genes_info$Module!='gray'], mart=mart)
EA_dataset = biomart_output %>% dplyr::rename('ID' = ensembl_gene_id) %>%
left_join(dataset %>% dplyr::select(ID, Module), by='ID')
##############################################################################
# PERFORM ENRICHMENT
# Following https://yulab-smu.github.io/clusterProfiler-book/chapter8.html
modules = dataset$Module[dataset$Module!='gray'] %>% as.character %>% table %>% names
universe = EA_dataset$entrezgene %>% as.character
enrichment_GO = list() # Gene Ontology
enrichment_DO = list() # Disease Ontology
enrichment_DGN = list() # Disease Gene Networks
enrichment_KEGG = list() # Kyoto Encyclopedia of Genes and Genomes
enrichment_Reactome = list() # Reactome: Pathway db
for(module in modules){
genes_in_module = EA_dataset$entrezgene[EA_dataset$Module == module]
enrichment_GO[[module]] = enrichGO(gene = genes_in_module, universe = universe, OrgDb = org.Hs.eg.db,
ont = 'All', pAdjustMethod = 'bonferroni', pvalueCutoff = 0.1,
qvalueCutoff = 1) %>% data.frame
enrichment_DO[[module]] = enrichDO(gene = genes_in_module, universe = universe, qvalueCutoff = 1,
pAdjustMethod = 'bonferroni', pvalueCutoff = 0.1) %>% data.frame
enrichment_DGN[[module]] = enrichDGN(gene = genes_in_module, universe = universe, qvalueCutoff = 1,
pAdjustMethod = 'bonferroni', pvalueCutoff = 0.1) %>% data.frame
enrichment_KEGG[[module]] = enrichKEGG(gene = genes_in_module, universe = universe, qvalueCutoff = 1,
pAdjustMethod = 'bonferroni', pvalueCutoff = 0.1) %>% data.frame
enrichment_Reactome[[module]] = enrichPathway(gene = genes_in_module, universe = universe, qvalueCutoff = 1,
pAdjustMethod = 'bonferroni', pvalueCutoff = 0.1) %>%
data.frame
}
# Temporal save, just in case SFARI Genes enrichment fails
save(enrichment_GO, enrichment_DO, enrichment_DGN, enrichment_KEGG, enrichment_Reactome, file=file_name)
##############################################################################
# PERFROM ENRICHMENT FOR SFARI GENES
# BUILD MAPPING BETWEEN GENES AND SFARI
# Build TERM2GENE: dataframe of 2 columns with term and gene
term2gene = biomart_output %>%
left_join(genes_info %>% dplyr::select(ID, `gene-score`),
by = c('ensembl_gene_id'='ID')) %>% dplyr::select(-ensembl_gene_id) %>%
mutate('SFARI' = ifelse(`gene-score` != 'Others','SFARI','Others'),
`gene-score` = ifelse(`gene-score` != 'Others',
paste0('SFARI Score ',`gene-score`), 'Others')) %>%
melt(id.vars = 'entrezgene') %>% dplyr::select(value, entrezgene) %>%
dplyr::rename('term' = value, 'gene' = entrezgene) %>% distinct
# PERFORM GSEA
enrichment_SFARI = list()
for(module in modules){
genes_in_module = EA_dataset$entrezgene[EA_dataset$Module == module]
enrichment_SFARI[[module]] = enricher(gene = genes_in_module, universe = universe,
pAdjustMethod = 'bonferroni', TERM2GENE = term2gene,
pvalueCutoff = 1, qvalueCutoff = 1, maxGSSize = 50000) %>%
data.frame %>% dplyr::select(-geneID,-Description)
}
##############################################################################
# PERFROM ENRICHMENT FOR SFARI GENES
# BUILD MAPPING BETWEEN GENES AND SFARI
# Build TERM2GENE: dataframe of 2 columns with term and gene
term2gene = biomart_output %>%
left_join(genes_info %>% dplyr::select(ID, `old-gene-score`),
by = c('ensembl_gene_id'='ID')) %>% dplyr::select(-ensembl_gene_id) %>%
mutate('SFARI' = ifelse(`old-gene-score` != 'Others','SFARI','Others'),
`old-gene-score` = ifelse(`old-gene-score` != 'Others',
paste0('SFARI Score ',`old-gene-score`), 'Others')) %>%
melt(id.vars = 'entrezgene') %>% dplyr::select(value, entrezgene) %>%
dplyr::rename('term' = value, 'gene' = entrezgene) %>% distinct
# PERFORM GSEA
enrichment_old_SFARI = list()
for(module in modules){
genes_in_module = EA_dataset$entrezgene[EA_dataset$Module == module]
enrichment_old_SFARI[[module]] = enricher(gene = genes_in_module, universe = universe,
pAdjustMethod = 'bonferroni', TERM2GENE = term2gene,
pvalueCutoff = 1, qvalueCutoff = 1, maxGSSize = 5e4) %>%
data.frame %>% dplyr::select(-geneID,-Description)
}
##############################################################################
# Save enrichment results
save(enrichment_GO, enrichment_DO, enrichment_DGN, enrichment_KEGG, enrichment_Reactome,
enrichment_SFARI, enrichment_old_SFARI, file=file_name)
rm(getinfo, mart, biomart_output, gene, module, term2gene, genes_in_module, EA_dataset, universe, file_name)
}
## Batch submitting query [====>--------------------------] 17% eta: 2s
## Batch submitting query [=====>-------------------------] 21% eta: 2s
## Batch submitting query [=======>-----------------------] 25% eta: 2s
## Batch submitting query [========>----------------------] 29% eta: 1s
## Batch submitting query [=========>---------------------] 33% eta: 1s
## Batch submitting query [===========>-------------------] 38% eta: 1s
## Batch submitting query [============>------------------] 42% eta: 1s
## Batch submitting query [=============>-----------------] 46% eta: 1s
## Batch submitting query [===============>---------------] 50% eta: 1s
## Batch submitting query [================>--------------] 54% eta: 1s
## Batch submitting query [=================>-------------] 58% eta: 1s
## Batch submitting query [==================>------------] 62% eta: 1s
## Batch submitting query [====================>----------] 67% eta: 1s
## Batch submitting query [=====================>---------] 71% eta: 1s
## Batch submitting query [======================>--------] 75% eta: 1s
## Batch submitting query [========================>------] 79% eta: 1s
## Batch submitting query [=========================>-----] 83% eta: 0s
## Batch submitting query [==========================>----] 88% eta: 0s Batch
## submitting query [===========================>---] 92% eta: 0s Batch submitting
## query [=============================>-] 96% eta: 0s Batch submitting query
## [===============================] 100% eta: 0s
## Warning in rm(getinfo, mart, biomart_output, gene, module, term2gene,
## genes_in_module, : object 'gene' not found
# Rename lists
ORA_GO = enrichment_GO
ORA_DGN = enrichment_DGN
ORA_DO = enrichment_DO
ORA_KEGG = enrichment_KEGG
ORA_Reactome = enrichment_Reactome
ORA_SFARI = enrichment_SFARI
ORA_old_SFARI = enrichment_old_SFARI
rm(enrichment_GO, enrichment_DO, enrichment_DGN, enrichment_KEGG, enrichment_Reactome, enrichment_SFARI,
enrichment_old_SFARI)
Both GSEA and ORA are commonly used to study enrichment in sets of genes, but when using them for studying our modules both have shortcomings:
GSEA takes into consideration some ordering of the genes, in this case given by their Module Membership, which is correlated to the membership of genes to the module, but has two problems:
Being a continuous scale, it doesn’t separate by a threshold the genes that are truly in the cluster from the rest
The Module Membership metric is correlated to the real membership of the module, but this correlation is not perfect: a high MM doesn’t always mean the gene belongs to that module, for example, selecting a random module, in the plot below we can see the MM distribution of genes belonging to that module against the rest of the genes belonging to other modules and, although in general, genes belonging to that module have a higher distribution of MM, there is still a big overlap between the two groups, making MM a noisy metric for performing GSEA
modules = dataset$Module[dataset$Module!='gray'] %>% as.character %>% table %>% names
module = sample(modules,1)
plot_data = dataset %>% dplyr::select(Module, paste0('MM.',gsub('#','',module))) %>%
mutate(in_module = substring(Module,2) == gsub('#','',module), selected_module = module) %>%
mutate(alpha = ifelse(in_module, 0.8, 0.1))
colnames(plot_data)[2] = 'MM'
p = plot_data %>% ggplot(aes(selected_module, MM, color = in_module)) + geom_jitter(alpha = plot_data$alpha) +
xlab('') + ylab('Module Membership') + coord_flip() + theme_minimal() +
theme(legend.position = 'none')
ggExtra::ggMarginal(p, type = 'density', groupColour = TRUE, groupFill = TRUE, margins = 'x', size=1)
rm(modules, module, p, plot_data)
So perhaps it could be useful to use both methods together, since they seem to complement each other’s shortcomings very well, performing the enrichment using both methods and identifying the terms that are found to be enriched by both
Note: Since the enrichment in both methods is quite a stric restriction, we decide to relax the corrected p-value threshold (using Bonferroni correction) to 0.1.
compare_methods = function(GSEA_list, ORA_list){
for(top_module in top_modules){
cat(paste0(' \n \nEnrichments for Module ', top_module, ' (MTcor=',
round(dataset$MTcor[dataset$Module==top_module][1],2), '): \n \n'))
GSEA = GSEA_list[[top_module]]
ORA = ORA_list[[top_module]]
cat(paste0('GSEA has ', nrow(GSEA), ' enriched terms \n'))
cat(paste0('ORA has ', nrow(ORA), ' enriched terms \n'))
cat(paste0(sum(ORA$ID %in% GSEA$ID), ' terms are enriched in both methods \n \n'))
plot_data = GSEA %>% mutate(pval_GSEA = p.adjust) %>% dplyr::select(ID, Description, NES, pval_GSEA) %>%
inner_join(ORA %>% mutate(pval_ORA = p.adjust) %>%
dplyr::select(ID, pval_ORA, GeneRatio, qvalue), by = 'ID')
if(nrow(plot_data)>0){
print(plot_data %>% mutate(pval_mean = pval_ORA + pval_GSEA) %>%
arrange(pval_mean) %>% dplyr::select(-pval_mean) %>%
kable %>% kable_styling(full_width = F))
}
}
}
plot_results = function(GSEA_list, ORA_list){
l = htmltools::tagList()
for(i in 1:length(top_modules)){
GSEA = GSEA_list[[top_modules[i]]]
ORA = ORA_list[[top_modules[i]]]
plot_data = GSEA %>% mutate(pval_GSEA = p.adjust) %>% dplyr::select(ID, Description, NES, pval_GSEA) %>%
inner_join(ORA %>% mutate(pval_ORA = p.adjust) %>% dplyr::select(ID, pval_ORA), by = 'ID')
if(nrow(plot_data)>5){
min_val = min(min(plot_data$pval_GSEA), min(plot_data$pval_ORA))
max_val = max(max(max(plot_data$pval_GSEA), max(plot_data$pval_ORA)),0.05)
ggp = ggplotly(plot_data %>% ggplot(aes(pval_GSEA, pval_ORA, color = NES)) +
geom_point(aes(id = Description)) +
geom_vline(xintercept = 0.05, color = 'gray', linetype = 'dotted') +
geom_hline(yintercept = 0.05, color = 'gray', linetype = 'dotted') +
ggtitle(paste0('Enriched terms in common for Module ', top_modules[i])) +
scale_x_continuous(limits = c(min_val, max_val)) +
scale_y_continuous(limits = c(min_val, max_val)) +
xlab('Corrected p-value for GSEA') + ylab('Corrected p-value for ORA') +
scale_colour_viridis(direction = -1) + theme_minimal() + coord_fixed())
l[[i]] = ggp
}
}
return(l)
}
compare_methods(GSEA_KEGG, ORA_KEGG)
Enrichments for Module #F97474 (MTcor=0.77):
GSEA has 9 enriched terms
ORA has 0 enriched terms
0 terms are enriched in both methods
Enrichments for Module #00BC53 (MTcor=0.74):
GSEA has 13 enriched terms
ORA has 1 enriched terms
1 terms are enriched in both methods
| ID | Description | NES | pval_GSEA | pval_ORA | GeneRatio | qvalue |
|---|---|---|---|---|---|---|
| hsa04060 | Cytokine-cytokine receptor interaction | 2.112811 | 0.0054156 | 0.0279856 | 13/217 | 0.0277848 |
Enrichments for Module #FF699E (MTcor=-0.76):
GSEA has 10 enriched terms
ORA has 2 enriched terms
1 terms are enriched in both methods
| ID | Description | NES | pval_GSEA | pval_ORA | GeneRatio | qvalue |
|---|---|---|---|---|---|---|
| hsa04740 | Olfactory transduction | 2.104481 | 0.030246 | 0.0030324 | 6/96 | 0.0027776 |
Enrichments for Module #F8766D (MTcor=-0.78):
GSEA has 10 enriched terms
ORA has 0 enriched terms
0 terms are enriched in both methods
compare_methods(GSEA_Reactome, ORA_Reactome)
Enrichments for Module #F97474 (MTcor=0.77):
GSEA has 16 enriched terms
ORA has 0 enriched terms
0 terms are enriched in both methods
Enrichments for Module #00BC53 (MTcor=0.74):
GSEA has 52 enriched terms
ORA has 1 enriched terms
0 terms are enriched in both methods
Enrichments for Module #FF699E (MTcor=-0.76):
GSEA has 12 enriched terms
ORA has 4 enriched terms
0 terms are enriched in both methods
Enrichments for Module #F8766D (MTcor=-0.78):
GSEA has 14 enriched terms
ORA has 1 enriched terms
0 terms are enriched in both methods
compare_methods(GSEA_GO, ORA_GO)
Enrichments for Module #F97474 (MTcor=0.77):
GSEA has 49 enriched terms
ORA has 0 enriched terms
0 terms are enriched in both methods
Enrichments for Module #00BC53 (MTcor=0.74):
GSEA has 57 enriched terms
ORA has 0 enriched terms
0 terms are enriched in both methods
Enrichments for Module #FF699E (MTcor=-0.76):
GSEA has 49 enriched terms
ORA has 2 enriched terms
0 terms are enriched in both methods
Enrichments for Module #F8766D (MTcor=-0.78):
GSEA has 14 enriched terms
ORA has 1 enriched terms
0 terms are enriched in both methods
compare_methods(GSEA_DO, ORA_DO)
Enrichments for Module #F97474 (MTcor=0.77):
GSEA has 65 enriched terms
ORA has 0 enriched terms
0 terms are enriched in both methods
Enrichments for Module #00BC53 (MTcor=0.74):
GSEA has 123 enriched terms
ORA has 1 enriched terms
1 terms are enriched in both methods
| ID | Description | NES | pval_GSEA | pval_ORA | GeneRatio | qvalue |
|---|---|---|---|---|---|---|
| DOID:1909 | melanoma | 1.822677 | 0.010034 | 0.0498741 | 39/223 | 0.0470184 |
Enrichments for Module #FF699E (MTcor=-0.76):
GSEA has 33 enriched terms
ORA has 0 enriched terms
0 terms are enriched in both methods
Enrichments for Module #F8766D (MTcor=-0.78):
GSEA has 118 enriched terms
ORA has 0 enriched terms
0 terms are enriched in both methods
compare_methods(GSEA_DGN, ORA_DGN)
Enrichments for Module #F97474 (MTcor=0.77):
GSEA has 88 enriched terms
ORA has 0 enriched terms
0 terms are enriched in both methods
Enrichments for Module #00BC53 (MTcor=0.74):
GSEA has 125 enriched terms
ORA has 0 enriched terms
0 terms are enriched in both methods
Enrichments for Module #FF699E (MTcor=-0.76):
GSEA has 16 enriched terms
ORA has 1 enriched terms
0 terms are enriched in both methods
Enrichments for Module #F8766D (MTcor=-0.78):
GSEA has 115 enriched terms
ORA has 0 enriched terms
0 terms are enriched in both methods
sessionInfo()
## R version 3.6.3 (2020-02-29)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 18.04.4 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.7.1
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.7.1
##
## locale:
## [1] LC_CTYPE=en_GB.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_GB.UTF-8 LC_COLLATE=en_GB.UTF-8
## [5] LC_MONETARY=en_GB.UTF-8 LC_MESSAGES=en_GB.UTF-8
## [7] LC_PAPER=en_GB.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_GB.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats4 parallel stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] WGCNA_1.69 fastcluster_1.1.25 dynamicTreeCut_1.63-1
## [4] org.Hs.eg.db_3.8.2 AnnotationDbi_1.46.1 IRanges_2.18.3
## [7] S4Vectors_0.22.1 Biobase_2.44.0 BiocGenerics_0.30.0
## [10] DOSE_3.10.2 ReactomePA_1.28.0 clusterProfiler_3.12.0
## [13] biomaRt_2.40.5 kableExtra_1.1.0 knitr_1.28
## [16] doParallel_1.0.15 iterators_1.0.12 foreach_1.5.0
## [19] polycor_0.7-10 expss_0.10.2 ggpubr_0.2.5
## [22] magrittr_1.5 GGally_1.5.0 gridExtra_2.3
## [25] viridis_0.5.1 viridisLite_0.3.0 RColorBrewer_1.1-2
## [28] dendextend_1.13.4 plotly_4.9.2 glue_1.4.1
## [31] reshape2_1.4.4 forcats_0.5.0 stringr_1.4.0
## [34] dplyr_1.0.0 purrr_0.3.4 readr_1.3.1
## [37] tidyr_1.1.0 tibble_3.0.1 ggplot2_3.3.2
## [40] tidyverse_1.3.0
##
## loaded via a namespace (and not attached):
## [1] tidyselect_1.1.0 RSQLite_2.2.0
## [3] htmlwidgets_1.5.1 grid_3.6.3
## [5] BiocParallel_1.18.1 munsell_0.5.0
## [7] codetools_0.2-16 preprocessCore_1.46.0
## [9] miniUI_0.1.1.1 withr_2.2.0
## [11] colorspace_1.4-1 GOSemSim_2.10.0
## [13] highr_0.8 rstudioapi_0.11
## [15] ggsignif_0.6.0 labeling_0.3
## [17] urltools_1.7.3 GenomeInfoDbData_1.2.1
## [19] polyclip_1.10-0 bit64_0.9-7
## [21] farver_2.0.3 vctrs_0.3.1
## [23] generics_0.0.2 xfun_0.12
## [25] R6_2.4.1 GenomeInfoDb_1.20.0
## [27] graphlayouts_0.7.0 locfit_1.5-9.4
## [29] DelayedArray_0.10.0 bitops_1.0-6
## [31] reshape_0.8.8 fgsea_1.10.1
## [33] gridGraphics_0.5-0 assertthat_0.2.1
## [35] promises_1.1.0 scales_1.1.1
## [37] ggraph_2.0.3 nnet_7.3-14
## [39] enrichplot_1.4.0 ggExtra_0.9
## [41] gtable_0.3.0 tidygraph_1.2.0
## [43] rlang_0.4.6 genefilter_1.66.0
## [45] splines_3.6.3 lazyeval_0.2.2
## [47] acepack_1.4.1 impute_1.58.0
## [49] broom_0.5.5 europepmc_0.4
## [51] checkmate_2.0.0 BiocManager_1.30.10
## [53] yaml_2.2.1 modelr_0.1.6
## [55] crosstalk_1.1.0.1 backports_1.1.8
## [57] httpuv_1.5.2 qvalue_2.16.0
## [59] Hmisc_4.4-0 tools_3.6.3
## [61] ggplotify_0.0.5 ellipsis_0.3.1
## [63] ggridges_0.5.2 Rcpp_1.0.4.6
## [65] plyr_1.8.6 base64enc_0.1-3
## [67] progress_1.2.2 zlibbioc_1.30.0
## [69] RCurl_1.98-1.2 prettyunits_1.1.1
## [71] rpart_4.1-15 cowplot_1.0.0
## [73] SummarizedExperiment_1.14.1 haven_2.2.0
## [75] ggrepel_0.8.2 cluster_2.1.0
## [77] fs_1.4.0 data.table_1.12.8
## [79] DO.db_2.9 triebeard_0.3.0
## [81] reprex_0.3.0 reactome.db_1.68.0
## [83] matrixStats_0.56.0 mime_0.9
## [85] xtable_1.8-4 hms_0.5.3
## [87] evaluate_0.14 XML_3.99-0.3
## [89] jpeg_0.1-8.1 readxl_1.3.1
## [91] compiler_3.6.3 crayon_1.3.4
## [93] htmltools_0.4.0 later_1.0.0
## [95] Formula_1.2-3 geneplotter_1.62.0
## [97] lubridate_1.7.4 DBI_1.1.0
## [99] tweenr_1.0.1 dbplyr_1.4.2
## [101] MASS_7.3-51.6 rappdirs_0.3.1
## [103] Matrix_1.2-18 cli_2.0.2
## [105] igraph_1.2.5 GenomicRanges_1.36.1
## [107] pkgconfig_2.0.3 rvcheck_0.1.8
## [109] foreign_0.8-76 xml2_1.2.5
## [111] annotate_1.62.0 webshot_0.5.2
## [113] XVector_0.24.0 rvest_0.3.5
## [115] digest_0.6.25 graph_1.62.0
## [117] rmarkdown_2.1 cellranger_1.1.0
## [119] fastmatch_1.1-0 htmlTable_1.13.3
## [121] curl_4.3 shiny_1.4.0.2
## [123] graphite_1.30.0 lifecycle_0.2.0
## [125] nlme_3.1-147 jsonlite_1.7.0
## [127] fansi_0.4.1 pillar_1.4.4
## [129] lattice_0.20-41 fastmap_1.0.1
## [131] httr_1.4.1 survival_3.1-12
## [133] GO.db_3.8.2 UpSetR_1.4.0
## [135] png_0.1-7 bit_1.1-15.2
## [137] ggforce_0.3.1 stringi_1.4.6
## [139] blob_1.2.1 DESeq2_1.24.0
## [141] latticeExtra_0.6-29 memoise_1.1.0